"""
author：fc
date：  2021/10/4
"""
#
# 数据规范化-标准差标准化 公式：(x-平均数）/标准差，结果一半数据大于0，一半小于0
# 平均数为0，标准差为1

import pandas as pd
import pymysql
con=pymysql.connect(host='localhost',user='root',passwd='root',db='datamine',autocommit=True)
sql="select * from jd"
jd_books=pd.read_sql(con=con,sql=sql)
jd_books['comment']=jd_books['comment'].str.replace('+','1')
jd_books['comment']=jd_books['comment'].str.replace('万','000')
jd_books['comment']=jd_books['comment'].astype('int')
jd_books['price']=jd_books['price'].astype('float')
print(jd_books.describe())

# 开始标准化
jd_books['price']=(jd_books['price']-jd_books['price'].mean())/jd_books['price'].std()
jd_books['comment']=(jd_books['comment']-jd_books['comment'].mean())/jd_books['comment'].std()
print(jd_books.describe())